In [1]:
import numpy as np
import pandas as pd
from pandas import DataFrame
from datetime import datetime
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
from matplotlib.figure import Figure
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns
import tkinter as tk
%matplotlib inline 
In [ ]:
#Data Resource: Novel Coronavirus (COVID-19) Cases, provided by JHU CSSE,https://github.com/CSSEGISandData/COVID-19
#Orginal version: 2/15/2020
#Version updated: 2/26/2020
    #Added latest confirm map(used iso 3-letter country code)
    #Added animation on confirm by regions bar charts
    #Changed maps' visual designs, rearranged color scales
#Latest version updated: 3/14/2020
    #Added User Interaction Program
    #Added US COVID-19 Map 
In [2]:
confirmed_raw = pd.read_excel('time_series_19-covid-Confirmed.xlsx').fillna(0)
death_raw = pd.read_excel('time_series_19-covid-Deaths.xlsx').fillna(0)
recovered_raw = pd.read_excel('time_series_19-covid-Recovered.xlsx').fillna(0)
geodata = pd.read_csv('geocodedata.csv')
In [3]:
confirmed = pd.merge(confirmed_raw, geodata, how = 'left')
death = pd.merge(death_raw, geodata, how = 'left')
recovered = pd.merge(recovered_raw, geodata, how = 'left')
In [4]:
cols = list(confirmed.columns)
cols = cols[:4]+[cols[-1]]+cols[4:-1]
confirmed = confirmed[cols]
death = death[cols]
recovered = recovered[cols]
In [5]:
#convert column name to datetime format
confirmedtimeframe = confirmed.iloc[:,5:len(confirmed.columns)]
confirmedtimeframe.columns = pd.to_datetime(confirmedtimeframe.columns).date
deathtimeframe = death.iloc[:,5:len(death.columns)]
deathtimeframe.columns = pd.to_datetime(deathtimeframe.columns).date
recoveredtimeframe = recovered.iloc[:,5:len(recovered.columns)]
recoveredtimeframe.columns = pd.to_datetime(recoveredtimeframe.columns).date
In [6]:
confirmed = pd.concat([confirmed.iloc[:,0:5],confirmedtimeframe],axis = 1,sort=True)
death = pd.concat([death.iloc[:,0:5], deathtimeframe],axis = 1,sort=True)
recovered = pd.concat([recovered.iloc[:,0:5], recoveredtimeframe],axis = 1,sort=True)
In [7]:
#Melt indivdual timeframe data to values under 'Date'
confirmed = confirmed.melt(id_vars=["Province/State", "Country/Region",'Lat','Long','Code'], var_name="Date", value_name="Confirmed")
death = death.melt(id_vars=["Province/State", "Country/Region",'Lat','Long','Code'], var_name="Date", value_name="Death")
recovered = recovered.melt(id_vars=["Province/State", "Country/Region",'Lat','Long','Code'], var_name="Date", value_name="Recovered")
In [8]:
#Consolidate dataframe that's ready to be used
data = pd.concat([confirmed, death.iloc[:,6],recovered.iloc[:,6]], axis =1, sort = True)
In [28]:
#separate China region from and other regions worldwide
chinaregion = data.loc[(data['Country/Region'] == 'China')|(data['Country/Region'] == 'Taiwan')|
                                     (data['Country/Region'] =='Hong Kong')|(data['Country/Region'] =='Macau'),:]
ind = data.loc[(data['Country/Region'] == 'China')|(data['Country/Region'] == 'Taiwan')|
                                     (data['Country/Region'] =='Hong Kong')|(data['Country/Region'] =='Macau'),:].index
nonchinaregion = data.drop(data.index[ind],inplace = False).reset_index(drop = True)
In [29]:
chinaregionpivot = pd.pivot_table(chinaregion,index=["Country/Region","Province/State",'Date'], 
                                  values = ['Confirmed','Recovered','Death'],
                                  aggfunc = np.mean)
In [30]:
nonchinaregionpivot = pd.pivot_table(nonchinaregion,index=["Country/Region","Province/State",'Date'], 
                      values = ['Confirmed','Recovered','Death'],
                      aggfunc = np.mean)
In [31]:
Countries_affected = list(data['Country/Region'].unique())
usstates = nonchinaregion.loc[nonchinaregion['Country/Region'] == 'US']
canstates = nonchinaregion.loc[nonchinaregion['Country/Region'] == 'Canada']
ausstates = nonchinaregion.loc[nonchinaregion['Country/Region'] == 'Australia']
In [32]:
China_province_affected = list(chinaregion['Province/State'].unique())
US_state_affected = list(usstates['Province/State'].unique())
CAN_state_affected = list(canstates['Province/State'].unique())
AUS_state_affected = list(ausstates['Province/State'].unique())
In [14]:
#Function to extract timeframe table per location
#region: China: chinaregionpivot/nonchinaregionpivot
#country: Country/Region
#state: Province/State
def getlocationdata(region, country, state):
    location = region.loc[(country, state,), :]
    return location
In [20]:
def script():
    selection = input('Type in Country/Region:')
    if selection in Countries_affected:
        if selection == 'China':
            china_province = input('Type in Province in China:')
            if china_province not in China_province_affected:
                print('Input not available, please retry.')
            else:
                province = getlocationdata(chinaregionpivot,selection,china_province)
            
           
                print('Trend of Confirmed Cases:')
                plt.figure(figsize=(9,6))
                province.loc[:,'Confirmed'].sort_index().plot.line(color='b',marker='o',linestyle ='-.')
                plt.ylabel('Number of Patients')
                plt.title('Confirmed in '+ selection + ', '+ china_province)
                plt.legend()
                plt.show()


                print('Trend of Death/Confirmed Cases:')
                plt.figure(figsize=(9,6))
                province.loc[:,'Death'].sort_index().plot.line(color='r',marker='x',linestyle ='--')
                province.loc[:,'Recovered'].sort_index().plot.line(color='g',marker='o',linestyle ='--')
                plt.ylabel('Number of Patients')
                plt.title('Death and Recovery in '+ selection + ', '+ china_province)
                plt.legend()
                plt.show()
            
        
        elif selection == 'US':
            US_state = input('Type in a State in US:')
            if US_state not in US_state_affected:
                print('Input not available, please retry.')
            else:
                state = getlocationdata(nonchinaregionpivot,selection,US_state)


                print('Trend of Confirmed Cases:')
                plt.figure(figsize=(9,6))
                state.loc[:,'Confirmed'].sort_index().plot.line(color='b',marker='o',linestyle ='-.')
                plt.ylabel('Number of Patients')
                plt.title('Confirmed in ' + selection + ', '+ US_state)
                plt.legend()
                plt.show()


                print('Trend of Death/Confirmed Cases:')
                plt.figure(figsize=(9,6))
                state.loc[:,'Death'].sort_index().plot.line(color='r',marker='x',linestyle ='--')
                state.loc[:,'Recovered'].sort_index().plot.line(color='g',marker='o',linestyle ='--')
                plt.ylabel('Number of Patients')
                plt.title('Death and Recovery in '+ selection + ', '+ US_state)
                plt.legend()
                plt.show()

        elif selection == 'Canada':
            CAN_state = input('Type in a State in Canada:')
            if CAN_state not in CAN_state_affected:
                print('Input not available, please retry.')
            else:
                state = getlocationdata(nonchinaregionpivot,selection,CAN_state)


                print('Trend of Confirmed Cases:')
                plt.figure(figsize=(9,6))
                state.loc[:,'Confirmed'].sort_index().plot.line(color='b',marker='o',linestyle ='-.')
                plt.ylabel('Number of Patients')
                plt.title('Confirmed in ' + selection + ', '+ CAN_state)
                plt.legend()
                plt.show()


                print('Trend of Death/Confirmed Cases:')
                plt.figure(figsize=(9,6))
                state.loc[:,'Death'].sort_index().plot.line(color='r',marker='x',linestyle ='--')
                state.loc[:,'Recovered'].sort_index().plot.line(color='g',marker='o',linestyle ='--')
                plt.ylabel('Number of Patients')
                plt.title('Death and Recovery in '+ selection + ', '+ CAN_state)
                plt.legend()
                plt.show()

        elif selection == 'Australia':
            AUS_state = input('Type in a State in Australia:')
            if AUS_state not in AUS_state_affected:
                print('Input not available, please retry.')
            else:
                state = getlocationdata(nonchinaregionpivot,selection,AUS_state)


                print('Trend of Confirmed Cases:')
                plt.figure(figsize=(9,6))
                state.loc[:,'Confirmed'].sort_index().plot.line(color='b',marker='o',linestyle ='-.')
                plt.ylabel('Number of Patients')
                plt.title('Confirmed in ' + selection + ', '+ AUS_state)
                plt.legend()
                plt.show()               



                print('Trend of Death/Confirmed Cases:')
                plt.figure(figsize=(9,6))
                state.loc[:,'Death'].sort_index().plot.line(color='r',marker='x',linestyle ='--')
                state.loc[:,'Recovered'].sort_index().plot.line(color='g',marker='o',linestyle ='--')
                plt.ylabel('Number of Patients')
                plt.title('Death and Recovery in '+ selection + ', '+ AUS_state)
                plt.legend()
                plt.show()
        else:
            country = getlocationdata(nonchinaregionpivot,selection,0)


            print('Trend of Confirmed Cases:')
            plt.figure(figsize=(9,6))
            country.loc[:,'Confirmed'].sort_index().plot.line(color='b',marker='o',linestyle ='-.')
            plt.ylabel('Number of Patients')
            plt.title('Confirmed in ' + selection)
            plt.legend()
            plt.show()

            print('Trend of Death/Confirmed Cases:')
            plt.figure(figsize=(9,6))
            country.loc[:,'Death'].sort_index().plot.line(color='r',marker='x',linestyle ='--')
            country.loc[:,'Recovered'].sort_index().plot.line(color='g',marker='o',linestyle ='--')
            plt.ylabel('Number of Patients')
            plt.title('Death and Recovery in '+ selection)
            plt.legend()
            plt.show()
    else:
        print('Input not available, please retry.')
    
    restart = input("Would you like to restart this program?(y/n)")
    if restart == 'y':
        script()
    if restart == "n":
        print ("Ends program")
In [83]:
script()
Type in Country/Region:China
Type in Province in China:Hubei
Trend of Confirmed Cases:
Trend of Death/Confirmed Cases:
Would you like to restart this program?(y/n)y
Type in Country/Region:US
Type in a State in US:Washington
Trend of Confirmed Cases:
Trend of Death/Confirmed Cases:
Would you like to restart this program?(y/n)y
Type in Country/Region:Italy
Trend of Confirmed Cases:
Trend of Death/Confirmed Cases:
Would you like to restart this program?(y/n)y
Type in Country/Region:Japan
Trend of Confirmed Cases:
Trend of Death/Confirmed Cases:
Would you like to restart this program?(y/n)n
Ends program
In [33]:
chinaregion['Date'] = pd.to_datetime(chinaregion['Date'])
chinaregion['Date'] = chinaregion['Date'].dt.strftime('%b.%d')
nonchinaregion['Date'] = pd.to_datetime(nonchinaregion['Date'])
nonchinaregion['Date'] = nonchinaregion['Date'].dt.strftime('%b.%d')
C:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py:1: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

C:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py:2: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

In [34]:
#Get the latest data
chinalatest = pd.DataFrame(chinaregion.groupby(['Province/State','Country/Region'])['Lat','Long','Code','Confirmed','Recovered','Death'].agg('max')).sort_values(by=['Confirmed'], ascending = False).reset_index()
In [24]:
fig = px.bar(chinaregion, x='Province/State', y='Confirmed',color = 'Province/State',color_discrete_sequence= px.colors.qualitative.Set3,
             animation_frame="Date",animation_group="Province/State",hover_data=['Confirmed'], title = 'Confirmed in China Region')
fig.show()
In [ ]:
#fig = px.bar(chinalatest.loc[chinalatest['Province/State'] != 'Hubei',:], x='Province/State', y='Confirmed',color = 'Province/State',
#             color_discrete_sequence= px.colors.qualitative.Set3,
#             hover_data=['Confirmed'], title = 'Confirmed in China Region Excluding Hubei')
fig = px.bar(chinaregion.loc[chinaregion['Province/State'] != 'Hubei',:], x='Province/State', y='Confirmed',color = 'Province/State',
             color_discrete_sequence= px.colors.qualitative.Set3, animation_frame="Date",animation_group="Province/State",
             hover_data=['Confirmed'], title = 'Confirmed in China Region Excluding Hubei')
fig.show()
In [25]:
nonchinalatest = pd.DataFrame(nonchinaregion.groupby(['Country/Region'])['Lat','Long','Code','Confirmed','Recovered','Death'].agg('max')).sort_values(by = 'Confirmed', ascending = False).reset_index()
In [57]:
nonchinaregiontrend = pd.DataFrame(nonchinaregion.groupby(['Country/Region','Date'])['Date','Confirmed','Recovered','Death'].agg('sum')).reset_index()
In [60]:
#Others: Diamond Princess Cruise Ship
#fig = px.bar(nonchinalatest.loc[nonchinalatest['Country/Region'] != 'Others',:], x='Country/Region', y='Confirmed',color = 'Country/Region',
#             color_discrete_sequence= px.colors.qualitative.Set3,
#             hover_data=['Confirmed'], title = 'Confirmed outside China')
fig = px.bar(nonchinaregion, x='Country/Region', y='Confirmed',color = 'Country/Region',
             color_discrete_sequence= px.colors.qualitative.Set3,animation_frame="Date",animation_group="Country/Region",
             hover_data=['Confirmed'], title = 'Confirmed outside China')

fig.show()
In [61]:
chinalatest['Death Rate'] = chinalatest['Death']/chinalatest['Confirmed']
chinalatest['Recover Rate'] = chinalatest['Recovered']/chinalatest['Confirmed']
nonchinalatest['Death Rate'] = nonchinalatest['Death']/nonchinalatest['Confirmed']
nonchinalatest['Recover Rate'] = nonchinalatest['Recovered']/nonchinalatest['Confirmed']
In [62]:
#Death v.s. Confirmed in other part of China
fig = px.scatter(chinalatest.loc[chinalatest['Province/State'] != 'Hubei',:], x="Confirmed", y="Death", size = 'Recover Rate',
                 color="Province/State",color_discrete_sequence= px.colors.qualitative.Plotly,size_max=60,
                 title = 'Deaths in Confirmed in Other Part of China')
fig.show()
In [63]:
#Death v.s. Confirmed outside China
fig = px.scatter(nonchinalatest, x="Confirmed", y="Death", size = 'Recover Rate',
                 color="Country/Region",color_discrete_sequence= px.colors.qualitative.Plotly,size_max=60,
                title = 'Deaths in Confirmed outside China')
fig.show()
In [64]:
#region: China: chinaregionpivot/nonchinaregionpivot
#country: Country/Region
#state: Province/State (enter 0 if shows '0')
def getincremental(region, country, state): 
    incc = []
    incd = []
    incr = []
    for i in range(len(getlocationdata(region, country, state))-1):
        c = getlocationdata(region, country, state)['Confirmed'][i+1]-getlocationdata(region, country, state)['Confirmed'][i]
        d = getlocationdata(region, country, state)['Death'][i+1]-getlocationdata(region, country, state)['Death'][i]
        r = getlocationdata(region, country, state)['Recovered'][i+1]-getlocationdata(region, country, state)['Recovered'][i]
        incc.append(c)
        incd.append(d)
        incr.append(r)

    dic = {'Incremental Confirmed': incc, 'Incremental Death': incd, 'Incremental Recovered': incr}

    incremental = pd.DataFrame(data = dic)
    incremental['Day'] = np.arange(1,len(incremental)+1,1)
    return incremental
In [81]:
#Specify the region you want to explore
region = chinaregionpivot
country = 'China'
state = 'Hubei'
incremental = getincremental(region, country, state)
In [67]:
#Plot trend of incremental confirms 
fig = go.Figure()
fig.add_trace(go.Scatter(x = incremental['Day'], y=incremental['Incremental Confirmed'], name='Confirmed',
                         mode='lines+markers',line=dict(color='firebrick', width=2, dash='dash')))
if state != 0:
    fig.update_layout(title='Incremental Confirms in ' + state,
                       xaxis_title='Days',
                       yaxis_title='Confirms')
else:
    fig.update_layout(title='Incremental Confirms in ' + country,
                       xaxis_title='Days',
                       yaxis_title='Confirms')
fig.show()
In [68]:
fig = go.Figure()
fig.add_trace(go.Scatter(x = incremental['Day'], y=incremental['Incremental Death'],
                    name='Death', mode='lines+markers',line=dict(color='firebrick', width=2, dash='dot')))
fig.add_trace(go.Scatter(x = incremental['Day'], y=incremental['Incremental Recovered'],
                    name='Recovered', mode='lines+markers',line=dict(color='royalblue', width=2,dash='dot')))
if state != 0:
    fig.update_layout(title='Incremental Death and Recovery in ' + state,
                   xaxis_title='Days',
                   yaxis_title='Number of Patients')
else:
    fig.update_layout(title='Incremental Death and Recovery in ' + country,
                   xaxis_title='Days',
                   yaxis_title='Number of Patients')
fig.show()
In [69]:
#Spreading in all China regions
fig = px.scatter_geo(chinaregion, lat ="Lat", lon = 'Long',
                    color="Confirmed",size = 'Confirmed', animation_frame="Date",
                    hover_name="Province/State", size_max=20,
                    color_continuous_scale=[[0, 'rgb(255,160,122)'],
                             [0.01,"rgb(255,99,71)"], 
                             [0.02,"rgb(220,20,60)"],
                             [0.2,"rgb(178,34,34)"],
                             [0.6,"rgb(165,42,42)"],
                             [0.8,"rgb(139,0,0)"],
                             [1.0,"rgb(128,0,0)"]])
fig.update_geos(
    showcoastlines=True, coastlinecolor="Black",
    showland=True, landcolor="LightBlue",
    showocean=True, oceancolor="CornflowerBlue",
)
fig.update_layout(title='Confirmed in China thru Timeline')
fig.show()
In [70]:
#Spreading in other China regions (Excluding Hubei)
fig = px.scatter_geo(chinaregion.loc[chinaregion['Province/State'] != 'Hubei',:], lat ="Lat", lon = 'Long',
                    color="Confirmed",size = 'Confirmed', animation_frame="Date", 
                    hover_name="Province/State", size_max=10,
                    color_continuous_scale=[[0, 'rgb(255,160,122)'],
                             [0.1,"rgb(255,99,71)"], 
                             [0.2,"rgb(220,20,60)"],
                             [0.4,"rgb(178,34,34)"],
                             [0.6,"rgb(165,42,42)"],
                             [0.8,"rgb(139,0,0)"],
                             [1.0,"rgb(128,0,0)"]],)
fig.update_geos(
    showcoastlines=True, coastlinecolor="Black",
    showland=True, landcolor="LightBlue",
    showocean=True, oceancolor="CornflowerBlue",
)
fig.update_layout(title='Confirmed in China (excluding Hubei) thru Timeline')
fig.show()
In [71]:
#Spreading in all regions outside China
fig = px.scatter_geo(nonchinaregion, lat ="Lat", lon = 'Long',
                    color="Confirmed",size = 'Confirmed',animation_frame="Date",
                    hover_name="Country/Region", size_max=20,
                    color_continuous_scale=[[0, 'rgb(255,160,122)'],
                             [0.01,"rgb(255,99,71)"], 
                             [0.02,"rgb(220,20,60)"],
                             [0.2,"rgb(178,34,34)"],
                             [0.6,"rgb(165,42,42)"],
                             [0.8,"rgb(139,0,0)"],
                             [1.0,"rgb(128,0,0)"]])
fig.update_geos(
    showcoastlines=True, coastlinecolor="Black",
    showland=True, landcolor="LightBlue",
    showocean=True, oceancolor="CornflowerBlue",
)
fig.update_layout(title='Confirmed outside China thru Timeline')
fig.show()
In [72]:
#Spreading in all named regions outside China
fig = px.scatter_geo(nonchinaregion.loc[(nonchinaregion['Country/Region'] != 'Others')|(nonchinaregion['Country/Region'] != 0),:], 
                     lat ="Lat", lon = 'Long',
                    color="Confirmed",size = 'Confirmed', animation_frame="Date",
                    hover_name="Province/State", size_max=30,
                    color_continuous_scale=[[0, 'rgb(255,160,122)'],
                             [0.01,"rgb(255,99,71)"], 
                             [0.02,"rgb(220,20,60)"],
                             [0.2,"rgb(178,34,34)"],
                             [0.6,"rgb(165,42,42)"],
                             [0.8,"rgb(139,0,0)"],
                             [1.0,"rgb(128,0,0)"]],)
fig.update_geos(
    showcoastlines=True, coastlinecolor="Black",
    showland=True, landcolor="LightBlue",
    showocean=True, oceancolor="CornflowerBlue",
)
fig.update_layout(title='Confirmed Outside China thru Timeline (only includes named regions)')
fig.show()
In [78]:
usstate = nonchinaregion.loc[nonchinaregion['Country/Region'] == 'US',:]
In [80]:
fig = px.scatter_geo(usstate, lat ="Lat", lon = 'Long',
                    color="Confirmed",size = 'Confirmed',animation_frame="Date",
                    hover_name="Province/State", size_max=20,
                    color_continuous_scale=[[0, 'rgb(255,160,122)'],
                             [0.01,"rgb(255,99,71)"], 
                             [0.02,"rgb(220,20,60)"],
                             [0.2,"rgb(178,34,34)"],
                             [0.6,"rgb(165,42,42)"],
                             [0.8,"rgb(139,0,0)"],
                             [1.0,"rgb(128,0,0)"]])
fig.update_geos(
    showcoastlines=True, coastlinecolor="Black",
    showland=True, landcolor="LightBlue",
    showocean=True, oceancolor="CornflowerBlue",
)
fig.update_layout(title='Confirmed in US thru Timeline')
fig.show()
In [73]:
latestdata = pd.concat([chinalatest,nonchinalatest],axis = 0,sort=False)
latestbycode = pd.DataFrame(latestdata.groupby(['Code','Country/Region'])['Country/Region','Code','Confirmed','Recovered','Death'].agg('sum')).sort_values(by=['Confirmed'], ascending = False).reset_index()
In [74]:
fig = go.Figure(data=go.Choropleth(
                locations = latestbycode['Code'],
                z = latestbycode['Confirmed'],
                text = latestbycode['Country/Region'],
                colorscale = [[0, 'rgb(255,222,173)'],
                             [0.001,"rgb(255,99,71)"], 
                             [0.005,"rgb(165,42,42)"],
                             [0.1,"rgb(178,34,34)"],
                             [1.0,"rgb(128,0,0)"]],
                autocolorscale=False,
                reversescale=False,
                marker_line_color='darkgray',
                marker_line_width=0.5,
                colorbar_title = 'Confirmed',
))

fig.update_layout(title='Latest Worldwide Confirmed')

fig.show()
In [75]:
fig = go.Figure(data=go.Choropleth(
                locations = latestbycode.loc[latestbycode['Code'] != 'CHN',:]['Code'],
                z = latestbycode.loc[latestbycode['Code'] != 'CHN',:]['Confirmed'],
                text = latestbycode.loc[latestbycode['Code'] != 'CHN',:]['Country/Region'],
                colorscale = [[0, 'rgb(255,160,122)'],
                             [0.01,"rgb(255,99,71)"], 
                             [0.02,"rgb(220,20,60)"],
                             [0.2,"rgb(178,34,34)"],
                             [0.6,"rgb(165,42,42)"],
                             [0.8,"rgb(139,0,0)"],
                             [1.0,"rgb(128,0,0)"]],
                autocolorscale=False,
                reversescale=False,
                marker_line_color='darkgray',
                marker_line_width=0.5,
                colorbar_title = 'Confirmed',
))

fig.update_layout(title='Latest Confirmed Outside China')

fig.show()